rm(list=ls(all=TRUE))
library(zoo)
library(plm)
library(reshape2)
library(data.table)
library(xlsx)
library(xtable)
library(DataCombine)
library(plyr)
library(vioplot)
library(ggplot2)
library(scales)
library(grid)
library(memisc)
setwd("~/Dropbox/LagIdentification/jop_final_version/replication")

#run simulation to replicate Figure 7
simulation<-function(phi,rho,kappa,beta,delta) {
    # panel properties
    #phi <- 0.5
    #rho <- 0.5
    #kappa <- 0.5
    #beta <- 2
    #delta <- 1
    units<- 100
    time<- 50
    n<-units*time
    id <- seq(1:units)
    
    # generate fixed effects
    fe <- rnorm(units,0,1)
    
    # merge fes and create panel
    frame <- data.frame(id, fe)
    panel<-expand.grid(seq(1:time),seq(1:units))
    names(panel)<-c("t","id")
    total <- merge(frame,panel,by=c("id"), all=T)
    
    # function to generate individual panels of u = phi*u_t-1 + nu, nu~N(0,1)
    # and x = rho*x_t-1 + kappa*u + eta, eta~N(0,1)
    gen.panel<-function(nothing){
        u<-vector(length=time)
        nu<-rnorm(time) # white noise
        u[1]<-nu[1] # random first value
        for(i in 2:length(u)) {
            u[i]  <- phi*u[i-1] + nu[i]
        }
        x<-vector(length=time)
        eta<-rnorm(time) # white noise
        x[1]<-eta[1] # random first value
        for(i in 2:length(u)) {
            x[i]  <- rho*x[i-1] + kappa*u[i]+ eta[i]
        }
        lagx<-vector(length=time)
        lagx[1]<- NA
        for(i in 2:length(u)) {
            lagx[i]  <- x[i-1]
        }
        y<-vector(length=time)
        e<-rnorm(time,0,5) # white noise
        y[1]<-y[1] # random first value
        for(i in 2:length(e)) {
            y[i]  <- beta*lagx[i] + delta*u[i]+ e[i]
        }
        lagy<-vector(length=time)
        lagy[1]<- NA # random first value
        for(i in 2:length(e)) {
            lagy[i]  <- y[i-1]
        }
        results<-data.frame(cbind(y,lagy,x,lagx,u,e,eta,nu))
        return(results) # drop first value
    }
    
    # create full panels
    dat<-do.call( rbind, replicate(units, gen.panel(1), simplify=FALSE ) )
    ## these functions might also work for x and u...
    #u<-as.vector(replicate(units,arima.sim(n=time,list(ar=phi))))
    #x<-as.vector(replicate(units,arima.sim(n=time,list(ar=rho))))+kappa*u
    
    # generate regression error term e
    dat$e<-rnorm(n,0,1)
    
    # create plm object
    predictors<-data.table(dat,total)
    setkeyv(predictors,c('id','t'))
    data<-plm.data(predictors, c("id","t"))
    correct <- lm(y~lagx+u, data=data)
    correct_beta<- summary(correct)$coefficients[2,1]
    correct_se<- summary(correct)$coefficients[2,2]
    correct_t<- abs(correct_beta/correct_se)
    ldv <- lm(y~ lagx + lagy, data=data)
    ldv_beta<- summary(ldv)$coefficients[2,1]
    ldv_se<- summary(ldv)$coefficients[2,2]
    ldv_t<- abs(ldv_beta/ldv_se)
    lagid <- lm(y~lagx, data=data)
    lagid_beta<- summary(lagid)$coefficients[2,1]
    lagid_se<- summary(lagid)$coefficients[2,2]
    lagid_t<- abs(lagid_beta/lagid_se)
    #outputs <- c(phi,rho,kappa,beta,delta,correct,ldv,lagid)
    #outputs
    outputs <- cbind(phi,rho,kappa,beta,delta,correct_beta,correct_se,correct_t,ldv_beta,ldv_se,ldv_t,lagid_beta,lagid_se,lagid_t)
    return(outputs)
}


#lag.simulate<-Simulate(
#simulation(phi,rho,kappa,beta,delta),
#expand.grid(
#phi<-c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9),
#rho<- .5, #define the list of values rho takes
#kappa<- .5, #define the list of values kappa takes.
#beta<- 2,
#delta<- 1
#),
#nsim=1,
#trace=10
#)

phi_list<-c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9)
rho_list<-c(.5)
kappa_list<-c(0,.5,2)
beta_list<-c(0,2)
delta_list<-c(1)

nphi <- length(phi_list)
nrho <- length(rho_list)
nkappa <- length(kappa_list)
nbeta <- length(beta_list)
ndelta <- length(delta_list)
numparams<- 15 #set the number of columns in the output (results as defined above) to which results are saved for each iteration
reps <- 100 #set the number of iteration
d <- data.frame(matrix(, nrow = nphi*nrho*nkappa*nbeta*ndelta*reps, ncol = numparams)) #d is the matrix that we save our results to.
i <- 0 #define i so that we can use it to save results to d[i,] for each iteration
output = NULL
for(phi in phi_list) {
    print(phi)
    for(rho in rho_list) {
        for(kappa in kappa_list) {
            print(kappa)
            for(beta in beta_list) {
                for(delta in delta_list) {
                    for(i3 in 1:reps) {
                        i <- i + 1
                        d[i,] <- cbind(i3,simulation(phi,rho,kappa,beta,delta)) #for each iteration i3, which ranges from 0 through 1000, we save the results computed from model(n,rho_y,rho_x).
                    }
                }
            }
        }
    }
}

sim_results<-data.frame(d)
sim_results
names(sim_results) <- c("n","phi","rho","kappa","beta","delta","correct_beta","correct_se","correct_t","ldv_beta","ldv_se","ldv_t","lagid_beta","lagid_se","lagid_t")

#create a backup
write.table(sim_results, file ="sim_results/sim_results_ldv.txt")

###########################################################################################################################################################################################plot
sim_results<-read.table("sim_results/sim_results_ldv.txt", header=TRUE)
attach(sim_results)
sim_results_backup <- sim_results
sim_results <- sim_results_backup

sim_results1 <- sim_results[sim_results$kappa==0 & sim_results$beta==2, ]
sim_results2 <- sim_results[sim_results$kappa==.5 & sim_results$beta==2, ]
sim_results3 <- sim_results[sim_results$kappa==2 & sim_results$beta==2, ]

sim_results7 <- sim_results[sim_results$kappa==0 & sim_results$beta==0, ]
sim_results8 <- sim_results[sim_results$kappa==.5 & sim_results$beta==0, ]
sim_results9 <- sim_results[sim_results$kappa==2 & sim_results$beta==0, ]


sim_results7$correct_t_sig <- ifelse(sim_results7$correct_t>=abs(qt(0.025,df=(5000-2))), 1, 0)
sim_results8$correct_t_sig <- ifelse(sim_results8$correct_t>=abs(qt(0.025,df=(5000-2))), 1, 0)
sim_results9$correct_t_sig <- ifelse(sim_results9$correct_t>=abs(qt(0.025,df=(5000-2))), 1, 0)

sim_results7$ldv_t_sig <- ifelse(sim_results7$ldv_t>=abs(qt(0.025,df=(5000-2))), 1, 0)
sim_results8$ldv_t_sig <- ifelse(sim_results8$ldv_t>=abs(qt(0.025,df=(5000-2))), 1, 0)
sim_results9$ldv_t_sig <- ifelse(sim_results9$ldv_t>=abs(qt(0.025,df=(5000-2))), 1, 0)

sim_results7$lagid_t_sig <- ifelse(sim_results7$lagid_t>=abs(qt(0.025,df=(5000-2))), 1, 0)
sim_results8$lagid_t_sig <- ifelse(sim_results8$lagid_t>=abs(qt(0.025,df=(5000-2))), 1, 0)
sim_results9$lagid_t_sig <- ifelse(sim_results9$lagid_t>=abs(qt(0.025,df=(5000-2))), 1, 0)


ols.correct1 <- c(tapply(sim_results1$correct_beta, INDEX = sim_results1$phi, mean))#compute the mean of coefficients from the correct model
ols.ldv1 <- c(tapply(sim_results1$ldv_beta, INDEX = sim_results1$phi, mean))#compute the mean of ldv estimates
ols.lagid1 <- c(tapply(sim_results1$lagid_beta, INDEX = sim_results1$phi, mean))#compute the mean of coefficients from the lag model

ols.correct2 <- c(tapply(sim_results2$correct_beta, INDEX = sim_results2$phi, mean))#compute the mean of coefficients from the correct model
ols.ldv2 <- c(tapply(sim_results2$ldv_beta, INDEX = sim_results2$phi, mean))#compute the mean of ldv estimates
ols.lagid2 <- c(tapply(sim_results2$lagid_beta, INDEX = sim_results2$phi, mean))#compute the mean of coefficients from the lag model

ols.correct3 <- c(tapply(sim_results3$correct_beta, INDEX = sim_results3$phi, mean))#compute the mean of coefficients from the correct model
ols.ldv3 <- c(tapply(sim_results3$ldv_beta, INDEX = sim_results3$phi, mean))#compute the mean of ldv estimates
ols.lagid3 <- c(tapply(sim_results3$lagid_beta, INDEX = sim_results3$phi, mean))#compute the mean of coefficients from the lag model

ols.correct.sig7 <- c(tapply(sim_results7$correct_t_sig, INDEX = sim_results1$phi, mean))
ols.ldv.sig7 <- c(tapply(sim_results7$ldv_t_sig, INDEX = sim_results1$phi, mean))
ols.lagid.sig7  <- c(tapply(sim_results7$lagid_t_sig, INDEX = sim_results1$phi, mean))

ols.correct.sig8 <- c(tapply(sim_results8$correct_t_sig, INDEX = sim_results2$phi, mean))
ols.ldv.sig8 <- c(tapply(sim_results8$ldv_t_sig, INDEX = sim_results2$phi, mean))
ols.lagid.sig8  <- c(tapply(sim_results8$lagid_t_sig, INDEX = sim_results2$phi, mean))

ols.correct.sig9 <- c(tapply(sim_results9$correct_t_sig, INDEX = sim_results3$phi, mean))
ols.ldv.sig9 <- c(tapply(sim_results9$ldv_t_sig, INDEX = sim_results3$phi, mean))
ols.lagid.sig9  <- c(tapply(sim_results9$lagid_t_sig, INDEX = sim_results3$phi, mean))



ols.correct.bias1 <- ols.correct1 - 2 #2 is the true parameter
ols.ldv.bias1 <- ols.ldv1 - 2
ols.lagid.bias1 <- ols.lagid1 - 2

ols.correct.rmse1 <- (sim_results1$correct_beta - 2)^2 #2 is the true parameter
ols.ldv.rmse1 <- (sim_results1$ldv_beta - 2)^2
ols.lagid.rmse1 <- (sim_results1$lagid_beta - 2)^2 #1 is beta*rho where beta is set at 2 and rho at 0.5.
ols.correct.rmse1 <- sqrt((rowsum(ols.correct.rmse1,sim_results1$phi))/100)
ols.ldv.rmse1 <- sqrt((rowsum(ols.ldv.rmse1,sim_results1$phi))/100)
ols.lagid.rmse1 <- sqrt((rowsum(ols.lagid.rmse1,sim_results1$phi))/100)
bias1 <- c(ols.correct.bias1,ols.ldv.bias1,ols.lagid.bias1)
bias1
rmse1 <- c(ols.correct.rmse1,ols.ldv.rmse1,ols.lagid.rmse1)
rmse1
t1 <- c(ols.correct.sig7,ols.ldv.sig7,ols.lagid.sig7)
t1

ols.correct.bias2 <- ols.correct2 - 2
ols.ldv.bias2 <- ols.ldv2 - 2
ols.lagid.bias2 <- ols.lagid2 - 2

ols.correct.rmse2 <- (sim_results2$correct_beta - 2)^2
ols.ldv.rmse2 <- (sim_results2$ldv_beta - 2)^2
ols.lagid.rmse2 <- (sim_results2$lagid_beta - 2)^2
ols.correct.rmse2 <- sqrt((rowsum(ols.correct.rmse2,sim_results2$phi))/100)
ols.ldv.rmse2 <- sqrt((rowsum(ols.ldv.rmse2,sim_results2$phi))/100)
ols.lagid.rmse2 <- sqrt((rowsum(ols.lagid.rmse2,sim_results2$phi))/100)
bias2 <- c(ols.correct.bias2,ols.ldv.bias2,ols.lagid.bias2)
bias2
rmse2 <- c(ols.correct.rmse2,ols.ldv.rmse2,ols.lagid.rmse2)
t2 <- c(ols.correct.sig8,ols.ldv.sig8,ols.lagid.sig8)
t2

ols.correct.bias3 <- ols.correct3 - 2
ols.ldv.bias3 <- ols.ldv3 - 2
ols.lagid.bias3 <- ols.lagid3 - 2
ols.correct.rmse3 <- (sim_results3$correct_beta - 2)^2
ols.ldv.rmse3 <- (sim_results3$ldv_beta - 2)^2
ols.lagid.rmse3 <- (sim_results3$lagid_beta - 2)^2
ols.correct.rmse3 <- sqrt((rowsum(ols.correct.rmse3,sim_results3$phi))/100)
ols.ldv.rmse3 <- sqrt((rowsum(ols.ldv.rmse3,sim_results3$phi))/100)
ols.lagid.rmse3 <- sqrt((rowsum(ols.lagid.rmse3,sim_results3$phi))/100)
bias3 <- c(ols.correct.bias3,ols.ldv.bias3,ols.lagid.bias3)
bias3
rmse3 <- c(ols.correct.rmse3,ols.ldv.rmse3,ols.lagid.rmse3)
rmse3
t3 <- c(ols.correct.sig9,ols.ldv.sig9,ols.lagid.sig9)
t3

model<-(rep(c(1, 2, 3), each=10))
b1 <- bias1[model<4]
m1 <- model
m1 <-factor(m1, labels=c("CORRECT", "LDV", "LAGID"))
phi1 <- (rep(c(seq(0,0.9,by=0.1)), times=3))
data1 <- data.frame(b1,m1,phi1)
rmse1 <- rmse1[model<4]
data.rmse1 <- data.frame(rmse1,m1,phi1)
t1 <- t1[model<4]
data.t1 <- data.frame(t1,m1,phi1)

b2 <- bias2[model<4]
m2 <- model
m2 <-factor(m2, labels=c("CORRECT", "LDV", "LAGID"))
phi2 <- (rep(c(seq(0,0.9,by=0.1)), times=3))
data2 <- data.frame(b2,m2,phi2)
rmse2 <- rmse2[model<4]
data.rmse2 <- data.frame(rmse2,m2,phi2)
t2 <- t2[model<4]
data.t2 <- data.frame(t2,m2,phi2)

b3 <- bias3[model<4]
m3 <- model
m3 <-factor(m3, labels=c("CORRECT", "LDV", "LAGID"))
phi3 <- (rep(c(seq(0,0.9,by=0.1)), times=3))
data3 <- data.frame(b3,m3,phi3)
rmse3 <- rmse3[model<4]
data.rmse3 <- data.frame(rmse3,m3,phi3)
t3 <- t3[model<4]
data.t3 <- data.frame(t3,m3,phi3)


colours<-c(CORRECT="green4", LDV="black", LAGID="red")
linetype<-c(CORRECT="dotted", LDV="dashed", LAGID="solid")

#bias in estimated coefficients
fmt <- function(){
  f <- function(x) as.character(round(x,2))
  f
}


d<-ggplot(data1, aes(x=phi1, y=b1, group=m1))
plot1<-d+labs(title=expression(paste("Bias, ",kappa,"=0, ",xi,"=2")))+geom_line(aes(group=factor(m1), colour=factor(m1), linetype=factor(m1))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("Bias", limits=c(-.2,1), breaks=seq(from=0, to=2, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()

d<-ggplot(data2, aes(x=phi2, y=b2, group=m2))
plot2<-d+labs(title=expression(paste("Bias, ",kappa,"=.5, ",xi,"=2")))+geom_line(aes(group=factor(m2), colour=factor(m2), linetype=factor(m2))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("Bias", limits=c(-.2,1), breaks=seq(from=0, to=2, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()

d<-ggplot(data3, aes(x=phi3, y=b3, group=m3))
plot3<-d+labs(title=expression(paste("Bias, ",kappa,"=2, ",xi,"=2")))+geom_line(aes(group=factor(m3), colour=factor(m3), linetype=factor(m3))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("Bias", limits=c(-.2,1), breaks=seq(from=0, to=2, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()

d<-ggplot(data.rmse1, aes(x=phi1, y=rmse1, group=m1))
plot4<-d+labs(title=expression(paste("RMSE, ",kappa,"=0, ",xi,"=2")))+geom_line(aes(group=factor(m1), colour=factor(m1), linetype=factor(m1))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("RMSE", limits=c(0,1), breaks=seq(from=0, to=1.5, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()

d<-ggplot(data.rmse2, aes(x=phi2, y=rmse2, group=m2))
plot5<-d+labs(title=expression(paste("RMSE, ",kappa,"=.5, ",xi,"=2")))+geom_line(aes(group=factor(m2), colour=factor(m2), linetype=factor(m2))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("RMSE", limits=c(0,1), breaks=seq(from=0, to=1.5, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()

d<-ggplot(data.rmse3, aes(x=phi3, y=rmse3, group=m3))
plot6<-d+labs(title=expression(paste("RMSE, ",kappa,"=2, ",xi,"=2")))+geom_line(aes(group=factor(m3), colour=factor(m3), linetype=factor(m3))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("RMSE", limits=c(0,1.5), breaks=seq(from=0, to=1.5, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()

#t
f<-ggplot(data.t1, aes(x=phi1, y=t1, group=m1))
plot7<-f+labs(title=expression(paste("Type-1 Error, ",kappa,"=0, ",xi,"=0")))+geom_line(aes(group=factor(m1), colour=factor(m1), linetype=factor(m1))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("Frequency", limits=c(0,1), breaks=seq(from=0, to=1, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()

f<-ggplot(data.t2, aes(x=phi2, y=t2, group=m2))
plot8<-f+labs(title=expression(paste("Type-1 Error, ",kappa,"=.5, ",xi,"=0")))+geom_line(aes(group=factor(m2), colour=factor(m2), linetype=factor(m2))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("Frequency", limits=c(0,1), breaks=seq(from=0, to=1, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()

f<-ggplot(data.t3, aes(x=phi3, y=t3, group=m3))
plot9<-f+labs(title=expression(paste("Type-1 Error, ",kappa,"=2, ",xi,"=0")))+geom_line(aes(group=factor(m3), colour=factor(m3), linetype=factor(m3))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("Frequency", limits=c(0,1), breaks=seq(from=0, to=1, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()


#### combine plots, one legend
library(ggplot2)
library(gridExtra)

grid_arrange_shared_legend <- function(...) {
  plots <- list(...)
  g <- ggplotGrob(plots[[1]] + theme(legend.position="bottom"))$grobs
  legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
  lheight <- sum(legend$height)
  grid.arrange(arrangeGrob(grobs=lapply(plots, function(x)
      x + theme(legend.position="none")),ncol = 3),
    legend,
    ncol = 1,
    heights = unit.c(unit(1, "npc") - lheight, lheight))
}

grid_arrange_shared_legend(plot2, plot5, plot8, plot3, plot6, plot9)
plotted<-recordPlot()
pdf("figures/fig7.pdf", width=8, height=7)
replayPlot(plotted)
dev.off()

